library(readODS) #read ods files
library(readxl) #reads excel files
library(tidyverse) #Tidy packages
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2) #pretty plots
library(ggthemes) #themes for ggplot
library(RColorBrewer)
library(scales) #axis formatting options
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
library(plotly) #interactive web graphs
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(networkD3) #interactive web network graphs

The National Atmospheric Emissions Inventory of the UK

Introduction to the inventory, with links kilotonnes of CO2 equivalents

# Map loop to download UK 2022 data from the UK Department for Energy Security and Net Zero
downloaded <- file.exists("UKGHG_2022.ods") #checks if file is downloaded in working directory
if(downloaded != T){ #if downloaded is not true
  map2("https://assets.publishing.service.gov.uk/media/65c0d54663a23d000dc821ca/final-greenhouse-gas-emissions-2022-by-source-dataset.ods", #update this link when new data available
                         "UKGHG_2022.ods", download.file)} else{print('data downloaded')} #name and download or print
## [1] "data downloaded"
#Read in UK 2022 data from the UK Department for Energy Security and Net Zero
GHG_UK22 <- read_ods(
  path = "UKGHG_2022.ods",
  sheet = 1, #define tab/sheet to read
  col_names = TRUE, #use header row for column names
  col_types = NULL, #guess data types
  na = "", #treat blank cells as NA
  skip = 0, #don't skip rows
  formula_as_formula = FALSE, #values only
  range = NULL, 
  row_names = FALSE, #no row names
  strings_as_factors = TRUE) %>% #use factors
  rename("Subsector" = "TES Subsector") #rename variable

#Create dataframe with only categorization
GHG_categories <- GHG_UK22[, c(3,6:12)] %>%
  distinct() 

#IPCC code and TES subsectors - for categorization
GHG_IPCC_TES <- GHG_UK22[, c(3,6,7)] %>%
  distinct() %>%
  rename("IPCC_code" = "IPCC Code") #rename variable to match other df

# Map loop to download UK Regional data 1990-2021 from the UK National Atmospheric Emissions Inventory
downloaded2 <- file.exists("GHG_regions.xlsx") #checks if file is downloaded in working directory
if(downloaded2 != T){ #if downloaded is not true
  map2("https://uk-air.defra.gov.uk/reports/cat09/2306200930_DA_GHGI_1990-2021_Final_v3.1.xlsx", #update this link when new data available
                        "GHG_regions.xlsx", download.file)} else{print('data downloaded')} #name and download or print
## [1] "data downloaded"
#Read in excel file, by source, with region
GHG_regions <- read_xlsx(
  "GHG_regions.xlsx", #excel file from current working directory
  sheet = "BySource_data", #define tab/sheet to read
  col_names = TRUE, #use header row for column names
  col_types = NULL, #guess data types
  na = "", #treat blank cells as NA
  trim_ws = TRUE, #trim any whitespace/unused columns and rows
  skip = 6, #skip 6 rows to get to pivot table data
  n_max = Inf, #set maximum number of rows to include
  guess_max = min(1000, Inf), #how many rows to use to guess data types
  progress = readxl_progress(), #display progress in reading in data
  .name_repair = "unique" #makes sure all column names not empty or duplicated
)

#Add subsector into GHG_regions, match format 
GHG_regions2 <- inner_join(GHG_IPCC_TES, GHG_regions,
    by='IPCC_code') %>% #use inner join to combine dataframes by a shared variable
  rename("GHG" = "Pollutant") %>% #rename variable to match UK only csv
  rename("NC_Sector" = "NCFormat") %>% #rename variable
  rename("TES_Sector" = "TES Sector") #rename variable

#Simplify regional dataset
GHG_simp <- GHG_regions2 %>%
  group_by(EmissionYear, TES_Sector, Subsector, RegionName, GHG) %>%
  summarize(Temissions = sum(Emission, na.rm = TRUE)) %>%
  rename("Sector" = "TES_Sector")
## `summarise()` has grouped output by 'EmissionYear', 'TES_Sector', 'Subsector',
## 'RegionName'. You can override using the `.groups` argument.
#2021 only
GHG_2021 <- GHG_simp %>%
    filter(EmissionYear == "2021") #filter to only 2021

#Grouped into all of UK
GHG_2021_joined <- GHG_2021 %>%
  group_by(Sector, Subsector, GHG) %>%
  summarize(Temissions = sum(Temissions, na.rm = TRUE))
## `summarise()` has grouped output by 'Sector', 'Subsector'. You can override
## using the `.groups` argument.

Visualization

#Define a custom palette
#Defra_Palette <- c("#E69F00", "#56B4E9", "#009E73", "#CC79A7")

Sankey Diagram

#Set up Sankey links & nodes dataframes
links <- data.frame(source=c(paste0(GHG_2021_joined$Sector), paste0(GHG_2021_joined$Subsector)), 
                    target=c(paste0(GHG_2021_joined$Subsector), paste0(GHG_2021_joined$GHG)),
                    value=(paste0(GHG_2021_joined$Temissions)))

links <- links[-c(83:85),] 

nodes <- data.frame(
  name=c(as.character(links$source), 
  as.character(links$target)) %>% unique())

#With networkD3, connection must be provided using id - So we need to reformat it.
links$IDsource <- match(links$source, nodes$name)-1 
links$IDtarget <- match(links$target, nodes$name)-1

#Plot with networkD3
sankey <- sankeyNetwork(Links = links, Nodes = nodes,
              Source = "IDsource", Target = "IDtarget",
              Value = "value", NodeID = "name",
              units = "kilotonnes CO2 eq.",
              sinksRight=FALSE, nodePadding = 8,
              iterations = 5)

sankey
library(rjson)

json_file <- "https://raw.githubusercontent.com/plotly/plotly.js/master/test/image/mocks/sankey_energy.json"
json_data <- fromJSON(paste(readLines(json_file), collapse=""))

fig <- plot_ly(
    type = "sankey",
    domain = list(
      x =  c(0,1),
      y =  c(0,1)
    ),
    orientation = "h",
    valueformat = ".0f",
    valuesuffix = "TWh",

    node = list(
      label = json_data$data[[1]]$node$label,
      color = json_data$data[[1]]$node$color,
      pad = 15,
      thickness = 15,
      line = list(
        color = "black",
        width = 0.5
      )
    ),

    link = list(
      source = json_data$data[[1]]$link$source,
      target = json_data$data[[1]]$link$target,
      value =  json_data$data[[1]]$link$value,
      label =  json_data$data[[1]]$link$label
    )
  ) 
fig <- fig %>% layout(
    title = "Energy forecast for 2050<br>Source: Department of Energy & Climate Change, Tom Counsell via <a href='https://bost.ocks.org/mike/sankey/'>Mike Bostock</a>",
    font = list(
      size = 10
    ),
    xaxis = list(showgrid = F, zeroline = F),
    yaxis = list(showgrid = F, zeroline = F)
)

fig
#Make into lists, add label list
linklist <- as.list(links)
linklist$value <- as.numeric(linklist$value)
linklist$label <- c("","","","","","","","","","","","","","","","","","","","",
                    "","","","","","","","","","","","","","","","","","","","",
                    "","","","","","","","","","","","","","","","","","","","",
                    "","","","","","","","","","","","","","","","","","","","",
                    "","","","","","","","","","","","","","","","","","","","",
                    "","","","","","","","","","","","","","","","","","","","",
                    "","","","","","","","","","","","","","","","","","","","",
                    "","","","","","","","","","","","","","","","","","","","",
                    "","","","","","","")

links_p <- plot_ly(links, type = list)

  
nodecolors <- nodes %>%
  mutate(color = c("#FFD700","#EA5F94","#9D02D7","#FA8775","#FFB14E","#CD34B5","#0000FF",
                   "#C9A600","#C7A600","#917800","#BEA767",
                   "#E5A0BE","#FF7ECB","#B42A66","#A7003B",
                   "#A400EC","#6B00B9","#631F90","#C860EA","#310074",
                   "#C35849",
                   "#C8811C","#FFAC1E","#BC8644","#9C4F00","#E1BA91",
                   "#774BE5","#2C15B2","#000097","#6836FF","#0000E3","#0000C7","#000096",
                   "#644540",
                   "#938E8B","#BFBFBF","#3B3734","#7A7979","#383838","#898887","#D3D1D0"))
nodelist <- as.list(nodecolors)

nodes_p <- plot_ly(nodecolors, type=list)

#Plot with Plotly
sankey2 <- plot_ly(type = "sankey",
                   orientation = "h",
                   valueformat = ".0f",
                   valuesuffix = "kt CO2 eq.",
      node = list(
        label = nodelist$name,
        color = nodelist$color,
        pad = 15,
        thickness = 20,
        line = list(color = "black", width = 0.5),
      link = list(
        source = linklist$IDsource,
        target = linklist$IDtarget,
        value = linklist$value,
        label = linklist$label)))

sankey2 <- sankey2 %>% layout(
    title = "Greenhouse Gas Emissions Per Sector in the UK",
    font = list(size = 10),
    xaxis = list(showgrid = F, zeroline = F),
    yaxis = list(showgrid = F, zeroline = F))

sankey2

Time Series Graph of Emissions by Sector

#Plot time series
Sect_time <- GHG_regions2 %>%
  group_by(EmissionYear, TES_Sector) %>%
  summarize(Temissions = sum(Emission, na.rm = TRUE)) %>%
  rename("Sector" = "TES_Sector") %>%
ggplot(aes(x=as.numeric(EmissionYear), y=Temissions, color=Sector)) +
  geom_line() +
  xlab("") + ylab("Greenhouse Gas Emissions (kilotonnes CO2 eq.)") +
  ggtitle("Greenhouse Gas Emissions of the UK per Sector Over Time") +
  theme_few() +
  scale_y_continuous(labels = comma) +
  scale_x_continuous(breaks = pretty(c(1990:2020), n=6)) +
  scale_color_brewer(palette = "Set2") +
  theme(legend.position="bottom")
## `summarise()` has grouped output by 'EmissionYear'. You can override using the
## `.groups` argument.
Sect_time #view ggplot

#Make interactive with Plotly
t_int <- ggplotly(Sect_time) %>% #convert to interactive graph
  layout(legend = list(
      orientation = "h"))
  
#Plotly customization
style(t_int, hovertemplate="Year: %{x:,.r}
CO2eq: %{y:,.4r} kt") #define what hover label says, round x values, round y values to 4 sig. figs

Bar Graphs per Sector and Subsector

Agriculture